*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do files uses cluster generated in C++ (lines 2-17 of Algorithm 1) to 
* complete the deliniation of prime locations (lines 18-34 of Algorithm 1)
* This do file is run using clusters generated based on big data establishments
* with employment weights estimated from the second half of cities

* Employment type identifier to 1 for big data establishment employment with weights from second half of cities
	global ET = 1

* Create temporary space for output
	capture mkdir "$temp/grid_PL_output_ET${ET}"	
	
* Loop over CBSAs
	qui u "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.dta", clear	
	
* Sample for weights creation from the second half of the alphabet excludes the median 33460 since it was used to estiamte employment weights
	local USMETROIDS 12060  12420  14460  16740  16980 17140  17460  18140  19100  19740 19820  26420  26900  28140  29820 30460  31080  33100  33340 		
	
* Set counter and begin loop
	local count = 1
	* Begin loop by metro 
			foreach USmid of local USMETROIDS{	
			di "...Working on CBSA `USmid', number `count' of `Ncbsafp'..."
			u "$dataoutput/USMetroGridEmp/EmpType_${ET}/EmpGrid_`USmid'", clear	// Using baseline clusters (total emp, standard p-value)
		
			* Fix special cases ************************************************
				 qui sum clusterID if clusterID != . // If there are no clusters
					local MaxCluster = r(max) 
					qui sum grid_x 
					local mingridX = round(r(min),0)
					qui sum grid_y if grid_x ==  `mingridX'
					local mingridY = round(r(min),0)
					if `MaxCluster' == 0 {
						 qui replace clusterID = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace developable = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace cell_total_emp = 999 if grid_x == `mingridX' & grid_y == `mingridY'
					}
					else {
						}
				qui sum clusterID if clusterID != .  // If there are too many clusters
					local MaxCluster = r(max) 
					qui sum grid_x 
					local mingridX = round(r(min),0)
					qui sum grid_y if grid_x ==  `mingridX'
					local mingridY = round(r(min),0)
					if `MaxCluster' > 50 {
						 qui replace clusterID = 0
						 qui replace clusterID = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace developable = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace cell_total_emp = 999 if grid_x == `mingridX' & grid_y == `mingridY'
					}
					else {
						}
			
			
			* DATA PREP ********************************************************
				qui ren cell_total_emp emp
			* Cluster employment relative to max PL employment in city
				qui gen TrimmedCluster = clusterID > 0							// Trimmed cluster variable
				qui egen Cemp = sum(emp), by(metro_id clusterID)
				qui egen Carea = count(emp), by(metro_id clusterID)
				qui gen Cempdens = 	Cemp  / Carea	
				qui egen CempMax = max(Cemp) if clusterID != 0, by(metro_id)	// Employment of largest cluster in metro
				qui gen CempRel = Cemp/CempMax 									// Cluster employment relative to largest cluster in metro
				qui replace TrimmedCluster = 0 if CempRel < $PLtresh			// Restrict to clusters with minimum relative employment

			* Ceneterate trimmed cluster variables	
				qui gen TrimmedClusterID = clusterID*TrimmedCluster if clusterID > 0 // Trimmed clusters identified
				qui egen TrimmedClusterX = mean(grid_x) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)
				qui egen TrimmedClusterY = mean(grid_y) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)		
	
			* AGGREGATION ******************************************************
				qui sum metro_id
				global MetroMin=r(min)
				global MetroMax=r(max)
				foreach num of numlist $MetroMin / $MetroMax { 					// This programme can process several metros. 
				local O = 999
				local i = 1
				while `O' > 0 {
				display "...round `i' of city `num'..."
				qui gen TrimmedClusterIDold = TrimmedClusterID // save old ID
				AGGCLUSTER2 `num' $DISTtresh
				qui gen test = abs(TrimmedClusterIDold-TrimmedClusterID) 		// generate objective
				qui sum test
				local O = r(sum)
				qui drop TrimmedClusterIDold test
				local i = `i'+1
				}
				}
				* Generate PL ID
				capture gen PLID = .											// gen PL ID variable
				qui replace PLID = TrimmedClusterID 

			* GENERATING CONVEX HULL *******************************************
				foreach num of numlist $MetroMin / $MetroMax { 					// loop over metros
				levelsof PLID if metro_id == `num', local(levels) 				// get list of PL IDs in a metro
				foreach pl of numlist `levels' {								// loop over PL IDs
					CH `num' `pl' 2
				}																// PL loop closes
				}																// metro loop closes
				
				* Clean temp folder 
				preserve 
					filelist, dir("$temp/PLtemp")
					levelsof filename, local(files)
					qui foreach f of local files{
						erase "$temp/PLtemp/`f'"
					}
				restore 
				
			* SPLIT PLs if parts are non-contiguous ****************************
				SPLITTER $SplitDist	
				
			* Generate PL variables	gen PL = PLID>0 & PLID != .
				qui gen PL = PLID>0 & PLID != .
				qui gen PL_x = grid_x if PL == 1
				qui gen PL_y = grid_y if PL == 1
				qui egen PL_emp = sum(emp) if PL == 1,by(metro_id PLID)
				qui egen PL_area = sum(area_g) if PL == 1,by(metro_id PLID)
				qui gen PL_empdens = PL_emp / PL_area
				qui egen PLX = mean(grid_x) if PLID > 0 & PLID != ., by(PLID metro_id)
				qui egen PLY = mean(grid_y) if PLID > 0 & PLID != ., by(PLID metro_id)		
				qui egen metro_emp = sum(emp) ,by(metro_id )

			* Flag as pseudo-PLs an drop unless the only one in city ***********************
				qui gen PPL = PL_emp < 10000  									// If PL has not enough employment
				preserve // Compute rank
				qui keep PL PPL PLID cbsafp PL_emp
					qui drop if PL == 0
					qui duplicates drop
					qui egen PLrankByCity = rank(PL_emp), by(cbsafp) field
					qui save "$temp/PLID_metro_rank_temp.dta", replace
				restore
				qui merge m:1 PLID cbsafp using "$temp/PLID_metro_rank_temp.dta", keepusing(PLrankByCity)	
				qui drop _m
				qui erase "$temp/PLID_metro_rank_temp.dta"
				qui  foreach var of varlist PL* {								// remove pseudo PLs, except if they are the larges PPL in the metro
					qui replace `var' = . if PPL == 1 & PLrankByCity != 1
				}
				qui replace PL = 0 if PL == .
				qui drop PL_emp // Now need to update PL_emp
				qui egen PL_emp = sum(emp) if PL == 1, by(cbsafp PLID)
				
			* Generate TS employment share within and outside PLs
				qui egen TSemp_insidePL = sum(cell_TS_emp) if PL == 1, by(cbsafp)
				qui egen TSemp_outsidePL = sum(cell_TS_emp) if PL == 0, by(cbsafp)
				foreach name in insidePL outsidePL {
					egen temp = mean(TSemp_`name'), by(cbsafp)
					drop TSemp_`name'
					ren temp TSemp_`name'
				}
				qui gen TSempshare_insidePL = TSemp_insidePL / TSemp_insidePL * 100
					label var TSempshare_insidePL "TS share within PLs (%)"
				qui gen TSempshare_outsidePL = TSemp_outsidePL / (metro_emp-TSemp_insidePL) *100
					label var TSempshare_outsidePL "TS share outside PLs (%)"
			* MAP PL
				local title = metro_name[1]
				twoway  	(scatter grid_y grid_x , msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.25)) ///
						(scatter grid_y grid_x if developable == 1, msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.4)) ///
						(scatter grid_y grid_x if emp > 0 , msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.75)) ///
						(scatter grid_y grid_x if  PL == 1 , msize(tiny)  mlcolor(none)  msymbol(s) mcolor(red*0.5)) ///
						(scatter PLY PLX , msize(tiny) msymbol(s)  mlcolor(none)  mcolor(red) mlabel(PLrankByCity) mlabcolor(red)) ///
						, legend(off) xsize(5) ysize(5) xlabel(0[10000]70000)		ylabel(0[10000]70000) graphregion(color(white))	title("`title'")
				capture mkdir "$temp/figures_App/US-CBSAs"
				capture mkdir "$temp/figures_App/US-CBSAs/ET${ET}"
				capture mkdir "$temp/figures_App/US-CBSAs/ET${ET}/MAPS"
				qui graph export "$temp/figures_App/US-CBSAs/ET${ET}/MAPS/MAPS_PL_TotalEmp_BaseP_`USmid'.png", replace height(1000) width(1000)
		
			
			* Save outputs *****************************************************
				qui drop TrimmedCluster Cemp Carea Cempdens CempMax CempRel TrimmedClusterID TrimmedClusterX TrimmedCluster
				qui save "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}_`USmid'.dta", replace
				qui keep if PL == 1
				qui save "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}_`USmid'", replace
			*Reset counter
				local `count' = `count' +1
			}
	* Loop ends
	
* Append outputs of loops into metro-grid files
		display "...compiling employment metro-grid data..."
		display `USMETROIDS'
		clear
		foreach USmid of local USMETROIDS{
			qui append using "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}_`USmid'.dta"
			}	
			qui save "$temp/grid_PL_output_ET${ET}/grid_PL_output_ET${ET}__all.dta", replace
		clear 
		foreach USmid of local USMETROIDS{
			qui append using "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}_`USmid'"
			}	
			qui keep if PL == 1
			qui save "$temp/grid_PL_output_ET${ET}/PL_grids_ET${ET}__all.dta", replace


* Script ends
